# -*- coding: utf-8 -*-
"""
巴利文转换工具 - v3.5 修正版
修复内容：
1. 修正德宏傣文 'ai' 等双元音拆分错误 (如 vai -> ᥝᥭ)
2. 优化文件名正则表达式，防止编号前缀丢失
3. 保持 Python 3.14+ 兼容
"""

import os
import re
import sys
import threading
import tkinter as tk
from tkinter import filedialog, messagebox, scrolledtext
from pathlib import Path

# ====================================================================
# [关键补丁] Python 3.14+ 兼容
# ====================================================================
import ast
try:
    getattr(ast, 'Str')
except AttributeError:
    ast.Str = ast.Constant
    ast.Num = ast.Constant
    ast.NameConstant = ast.Constant
    ast.Bytes = ast.Constant
    ast.Ellipsis = ast.Constant

# ====================================================================
# [核心修复] 手动德宏傣文 (Tai Le) 转换引擎
# ====================================================================
def manual_taile_converter(text):
    """
    手动将 IAST 巴利文转换为 德宏傣文 (Tai Le)
    修正：优化替换顺序，确保 ai, th, ph 等不被错误拆分
    """
    res = text.lower()
    
    # 替换规则表 (顺序至关重要：长字符必须在前)
    replacements = [
        # 1. 特殊双字符/双元音 (必须最先处理)
        ('ai', 'ᥭ'),  # 修复：放在 'a' 之前
        ('au', 'ᥝ'),
        
        # 2. 双辅音/送气音
        ('kh', 'ᥑ'), ('gh', 'ᥑ'),
        ('ch', 'ᥓ'), ('jh', 'ᥓ'),
        ('ṭh', 'ᥗ'), ('ḍh', 'ᥗ'),
        ('th', 'ᥗ'), ('dh', 'ᥗ'),
        ('ph', 'ᥚ'), ('bh', 'ᥚ'),
        ('ng', 'ᥒ'), ('ny', 'ᥢ'),
        
        # 3. 特殊符号
        ('ṃ', 'ᥛ'), ('ṁ', 'ᥛ'),
        
        # 4. 单辅音
        ('k', 'ᥐ'), ('g', 'ᥐ'),
        ('c', 'ᥓ'), ('j', 'ᥓ'),
        ('ṭ', 'ᥖ'), ('ḍ', 'ᥖ'),
        ('t', 'ᥖ'), ('d', 'ᥖ'),
        ('p', 'ᥚ'), ('b', 'ᥚ'),
        ('n', 'ᥢ'), ('ṅ', 'ᥒ'), ('ñ', 'ᥢ'), ('ṇ', 'ᥢ'),
        ('m', 'ᥛ'),
        ('y', 'ᥕ'), ('r', 'ᥞ'), ('l', 'ᥘ'), ('ḷ', 'ᥘ'),
        ('v', 'ᥝ'), ('w', 'ᥝ'),
        ('s', 'ᥔ'), ('h', 'ᥞ'),
        
        # 5. 单元音 (最后处理)
        ('ā', 'ᥣ'), ('a', 'ᥣ'), 
        ('ī', 'ᥫ'), ('i', 'ᥫ'),
        ('ū', 'ᥧ'), ('u', 'ᥧ'),
        ('e', 'ᥥ'), ('o', 'ᥨ'),
    ]
    
    for src, dst in replacements:
        res = res.replace(src, dst)
        
    return res

# ====================================================================
# Aksharamukha 包装器
# ====================================================================
def safe_convert(text, source='IAST', target='Thai'):
    try:
        from aksharamukha import transliterate
        if target == 'TaiLe':
            return manual_taile_converter(text)
        return transliterate.process(source, target, text)
    except Exception as e:
        return f"[Error: {e}]"

# 各语言转换接口
def convert_to_thai(text):     return safe_convert(text, 'IAST', 'Thai')
def convert_to_myanmar(text):  return safe_convert(text, 'IAST', 'Burmese')
def convert_to_sinhala(text):  return safe_convert(text, 'IAST', 'Sinhala')
def convert_to_lao(text):      return safe_convert(text, 'IAST', 'Lao')
def convert_to_khmer(text):    return safe_convert(text, 'IAST', 'Khmer')
def convert_to_shan(text):     return safe_convert(text, 'IAST', 'Shan')
def convert_to_taitham(text):  return safe_convert(text, 'IAST', 'TaiTham')
def convert_to_taile(text):    return manual_taile_converter(text)

# ====================================================================
# [修复] 文件名处理
# ====================================================================
def convert_filename(original_filename, suffix, converter):
    base_name = original_filename.replace('.pali.txt', '')
    
    # 优化正则表达式：更宽容地匹配前缀 (只要是下划线前的部分都算前缀)
    # 原来: ^([A-Z]\d+n?\d+_)(.+)$ 可能太严格
    # 现在: ^([a-zA-Z0-9\.\-]+_)(.+)$
    match = re.match(r'^([a-zA-Z0-9\.\-]+_)(.+)$', base_name)
    
    if match:
        prefix = match.group(1) # 例如 T18n0848_
        pali_name = match.group(2)
        try:
            converted_name = converter(pali_name)
            # 移除非法文件字符
            converted_name = re.sub(r'[\\/*?:"<>|]', '', converted_name)
            converted_name = re.sub(r'\s+', ' ', converted_name).strip()
            return f"{prefix}{converted_name}.{suffix}.txt"
        except:
            return f"{base_name}.{suffix}.txt"
    else:
        # 如果没有匹配到下划线前缀，直接转换整个文件名
        try:
            converted_name = converter(base_name)
            converted_name = re.sub(r'[\\/*?:"<>|]', '', converted_name).strip()
            return f"{converted_name}.{suffix}.txt"
        except:
            return f"{base_name}.{suffix}.txt"

# ====================================================================
# 批量处理逻辑
# ====================================================================
def process_pali_file(input_file, log_callback, selected_scripts):
    input_file = Path(input_file)
    encodings = ['utf-8', 'utf-8-sig', 'gbk', 'latin-1']
    content = None
    
    for enc in encodings:
        try:
            with open(input_file, 'r', encoding=enc) as f:
                content = f.read()
            break
        except UnicodeDecodeError:
            continue
    
    if content is None:
        log_callback(f"❌ 读取失败: {input_file.name}\n")
        return []
    
    script_map = {
        'thai':      ('泰文', convert_to_thai),
        'myanmar':   ('缅文', convert_to_myanmar),
        'sinhala':   ('僧伽罗文', convert_to_sinhala),
        'lao':       ('老挝文', convert_to_lao),
        'khmer':     ('高棉文', convert_to_khmer),
        'taile':     ('德宏傣文', convert_to_taile),
        'shan':      ('掸文', convert_to_shan),
        'taitham':   ('兰纳文', convert_to_taitham),
    }
    
    generated_files = []
    
    for code in selected_scripts:
        if code not in script_map: continue
        display_name, converter = script_map[code]
        
        try:
            converted_content = converter(content)
            new_filename = convert_filename(input_file.name, code, converter)
            output_file = input_file.parent / new_filename
            
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(converted_content)
            
            generated_files.append(str(output_file))
            log_callback(f"  ✅ {display_name:8s} → {new_filename}\n")
        except Exception as e:
            log_callback(f"  ❌ {display_name:8s} 错误: {str(e)}\n")
    
    return generated_files

# ====================================================================
# GUI
# ====================================================================
class PaliConverterGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("巴利文转换工具 v3.5 (修正版)")
        self.root.geometry("820x680")
        
        tk.Label(root, text="巴利文多文字转换工具", font=("Microsoft YaHei", 16, "bold")).pack(pady=10)
        tk.Label(root, text="v3.5 更新：修正德宏文拼写错误 & 文件名丢失前缀问题", fg="#e67e22").pack(pady=5)
        
        select_frame = tk.LabelFrame(root, text="选择文字", font=("Microsoft YaHei", 10, "bold"))
        select_frame.pack(pady=10, padx=20, fill=tk.X)
        
        self.script_vars = {}
        self.scripts_config = [
            ('thai', '泰文 (Thai)'),
            ('myanmar', '缅文 (Myanmar)'),
            ('sinhala', '僧伽罗文 (Sinhala)'),
            ('lao', '老挝文 (Lao)'),
            ('khmer', '高棉文 (Khmer)'),
            ('taile', '⭐ 德宏傣文 (Tai Le)'),
            ('shan', '✅ 掸文 (Shan)'),
            ('taitham', '✅ 兰纳文 (Tai Tham)'),
        ]
        
        for i, (code, name) in enumerate(self.scripts_config):
            var = tk.BooleanVar(value=True)
            self.script_vars[code] = var
            fg_c = "#27ae60" if "⭐" in name or "✅" in name else "#2c3e50"
            cb = tk.Checkbutton(select_frame, text=name, variable=var, font=("Microsoft YaHei", 9), fg=fg_c)
            cb.grid(row=i//3, column=i%3, sticky="w", padx=10, pady=5)
        
        btn_frame = tk.Frame(select_frame)
        btn_frame.grid(row=10, column=0, columnspan=3, pady=10)
        tk.Button(btn_frame, text="全选", command=lambda: self.toggle_all(True)).pack(side=tk.LEFT, padx=5)
        tk.Button(btn_frame, text="清空", command=lambda: self.toggle_all(False)).pack(side=tk.LEFT, padx=5)
        
        self.select_button = tk.Button(root, text="📂 选择目录开始转换", command=self.select_directory,
                                     font=("Microsoft YaHei", 12, "bold"), bg="#3498db", fg="white", padx=20, pady=8)
        self.select_button.pack(pady=10)
        self.status_label = tk.Label(root, text="准备就绪", fg="#95a5a6")
        self.status_label.pack()
        
        log_frame = tk.Frame(root)
        log_frame.pack(fill=tk.BOTH, expand=True, padx=15, pady=5)
        self.log_text = scrolledtext.ScrolledText(log_frame, wrap=tk.WORD, font=("Consolas", 10), bg="#f8f9fa")
        self.log_text.pack(fill=tk.BOTH, expand=True)
        
        bottom_frame = tk.Frame(root)
        bottom_frame.pack(pady=10)
        tk.Button(bottom_frame, text="测试转换", command=self.run_test, bg="#27ae60", fg="white").pack(side=tk.LEFT, padx=10)
        tk.Button(bottom_frame, text="清空日志", command=self.clear_log, bg="#e74c3c", fg="white").pack(side=tk.LEFT, padx=10)

    def toggle_all(self, state):
        for var in self.script_vars.values(): var.set(state)
    def log(self, msg):
        self.log_text.insert(tk.END, msg)
        self.log_text.see(tk.END)
    def clear_log(self): self.log_text.delete(1.0, tk.END)
    def get_selected_codes(self): return [c for c, v in self.script_vars.items() if v.get()]

    def run_test(self):
        self.clear_log()
        self.log("🧪 测试德宏文修复效果:\n")
        text = "Mahāvairocana"
        self.log(f"原文: {text}\n")
        self.log(f"德宏傣文: {manual_taile_converter(text)}\n")
        self.log(f"  (期望: ᥛᥣᥞᥣᥝᥭᥨᥞᥓᥢᥣ - 注意 'ai' 应为 'ᥭ' 而非 'ᥣᥫ')\n")

    def select_directory(self):
        folder = filedialog.askdirectory()
        if not folder: return
        t = threading.Thread(target=self.worker, args=(folder, self.get_selected_codes()))
        t.daemon = True
        t.start()
        
    def worker(self, folder, selected):
        self.select_button.config(state=tk.DISABLED)
        self.clear_log()
        self.log(f"📂 目录: {folder}\n开始处理...\n")
        pali_files = list(Path(folder).rglob('*pali.txt'))
        if not pali_files:
            self.log("未找到 pali.txt 文件\n")
            self.select_button.config(state=tk.NORMAL)
            return
        for i, p_file in enumerate(pali_files, 1):
            self.status_label.config(text=f"处理: {i}/{len(pali_files)}")
            self.log(f"📄 {p_file.name}\n")
            process_pali_file(p_file, self.log, selected)
            self.log("\n")
        self.log("🎉 完成！\n")
        self.status_label.config(text="完成")
        self.select_button.config(state=tk.NORMAL)
        messagebox.showinfo("完成", "转换结束")

def main():
    try: import aksharamukha
    except: pass
    root = tk.Tk()
    PaliConverterGUI(root)
    root.mainloop()

if __name__ == "__main__":
    main()
